#!/bin/bash

### CHANGE THE FOLLOWING VARIABLES
MODEL=mistralai/Mistral-7B-Instruct-v0.2    # LLM model name
LOCAL_HF_HOME=                              # the HF_HOME on local machine. vLLM will try finding/dowloading the models here
HF_TOKEN=                                   # (optional) the huggingface token to access some special models
PORT=8000                                   # Port for the server
PORT2=8001

sudo docker pull apostacyh/vllm:lmcache-0.1.0
sudo docker pull apostacyh/lmcache-server:0.1.0

sudo docker run --name lmcache-server --network host -d apostacyh/lmcache-server:0.1.0 0.0.0.0 65432

sudo docker run --name lmcache-vllm1 --runtime nvidia --gpus '"device=0"' \
    -v ${LOCAL_HF_HOME}:/root/.cache/huggingface \
    --env "HF_TOKEN=${HF_TOKEN}" \
    --ipc=host \
    --network=host \
    -d apostacyh/vllm:lmcache-0.1.0 \
    --model ${MODEL} --gpu-memory-utilization 0.6 --port ${PORT} \
    --lmcache-config-file /lmcache/LMCache/examples/example.yaml

sudo docker run --name lmcache-vllm2 --runtime nvidia --gpus '"device=1"' \
    -v ${LOCAL_HF_HOME}:/root/.cache/huggingface \
    --env "HF_TOKEN=${HF_TOKEN}" \
    --ipc=host \
    --network=host \
    -d apostacyh/vllm:lmcache-0.1.0 \
    --model ${MODEL} --gpu-memory-utilization 0.6 --port ${PORT2} \
    --lmcache-config-file /lmcache/LMCache/examples/example.yaml
